#!/usr/bin/python
# -*- coding: utf-8 -*-
# ----------------------------------------------------------------------
# doxylex.py
#
# A lexer for doxygen-style comment
#   
#   divide comments into two basic types
# 
# ----------------------------------------------------------------------

import lex

class doxylex:
    # Reserved words
    reserved = (
        'AUTHOR', 'FILE', 'DATE', 
        'BRIEF', 'PARAM', 'RETURN', 'DEPTH', 'DETAIL', 'DETAILS', 'GVARIN', 'GVAROUT',
        'ALIAS', 'IGNORE', 
        )

    tokens = reserved + (
        # keywords, and description word
        'KEY', 'WORD', 'NEWLINE', 'STAR', 'BLANKLINE'
    )
    # Completely ignored characters
    t_ignore           = ' \t\x0c'
        
    # Identifiers and reserved words

    #t_WHITESPACE = r'[ \t\v]'

    reserved_map = { }
    for r in reserved:
        reserved_map[r.lower()] = r

    # as a construtor, import comment variable
    def __init__(self, autobuild=1):
        if autobuild==1:
            self.build(lextab="doxytab")

    # use class method, supply build 使用类的方法需要解决的问题
    def build(self, **kwargs):
        self.lexer = lex.lex(module=self, **kwargs)

    def test(self, data):
        self.lexer.input(data)
        while True:
             tok = lexer.token()
             if not tok: break
             print tok

    # Newlines
    def t_NEWLINE(self, t):
        r'(\n|\r)+'
        t.lexer.lineno += t.value.count("\n")
        return t

    def t_KEY(self, t):
        r'@[A-Za-z_][\w_]*'
        #t.type = self.reserved_map.get(t.value[1:], "KEY") # get real type
        t.type = self.reserved_map.get(t.value, "KEY")  # remain KEY type
        return t

    t_BLANKLINE = r'\*\s*\n'

    #t_STAR = r'\*'

    #t_WORD = r'[^*\s]\S*'
    t_WORD = r'[^*\s].*'    # 从非*,非空格的第一个字符开始，到行尾

    def t_error(self, t):
        if (t.value[0] != '*'):
            print("doxy illegal chr %s" % repr(t.value[0]))
        t.lexer.skip(1)

if __name__ == "__main__":
    # 一个简单的单元测试 
    cm = {}
    mylex = doxylex()
    lex.runmain(mylex.lexer)
    print(cm)
